-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[Headers][X86] Update MMX arithmetic intrinsics to be used in constexpr #152296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Update the easy add/sub/mul/logic/cmp/scalat_to_vector intrinsics to be constexpr compatible. I'm not expecting anyone to be very interested in using MMX intrinsics, but they're smaller than the other types and are useful to test the constexpr handling and test methods before applying them to SSE/AVX2/AVX512 intrinsics.
@llvm/pr-subscribers-clang Author: Simon Pilgrim (RKSimon) ChangesUpdate the easy add/sub/mul/logic/cmp/scalat_to_vector intrinsics to be constexpr compatible. I'm not expecting anyone to be very interested in using MMX intrinsics, but they're smaller than the other types and are useful to test the constexpr handling and test methods before we start applying them to SSE/AVX2/AVX512 intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/152296.diff 3 Files Affected:
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 78e8a422db4c1..06d08a486a954 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2127,8 +2127,9 @@ _mm_add_epi32(__m128i __a, __m128i __b) {
/// \param __b
/// A 64-bit integer.
/// \returns A 64-bit integer containing the sum of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
- return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_si64(__m64 __a,
+ __m64 __b) {
+ return (__m64)(((__v1du)__a)[0] + ((__v1du)__b)[0]);
}
/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
@@ -2557,8 +2558,9 @@ _mm_sub_epi32(__m128i __a, __m128i __b) {
/// A 64-bit integer vector containing the subtrahend.
/// \returns A 64-bit integer vector containing the difference of the values in
/// the operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
- return (__m64)((unsigned long long)__a - (unsigned long long)__b);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_si64(__m64 __a,
+ __m64 __b) {
+ return (__m64)(((__v1du)__a)[0] - ((__v1du)__b)[0]);
}
/// Subtracts the corresponding elements of two [2 x i64] vectors.
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index dc0fa5c523eeb..5a02a45512345 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -85,7 +85,7 @@ _mm_empty(void) {
/// A 32-bit integer value.
/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
/// parameter. The upper 32 bits are set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi32_si64(int __i)
{
return __extension__ (__m64)(__v2si){__i, 0};
@@ -102,7 +102,7 @@ _mm_cvtsi32_si64(int __i)
/// A 64-bit integer vector.
/// \returns A 32-bit signed integer value containing the lower 32 bits of the
/// parameter.
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi64_si32(__m64 __m)
{
return ((__v2si)__m)[0];
@@ -118,10 +118,10 @@ _mm_cvtsi64_si32(__m64 __m)
/// A 64-bit signed integer.
/// \returns A 64-bit integer vector containing the same bitwise pattern as the
/// parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi64_m64(long long __i)
{
- return (__m64)__i;
+ return __extension__ (__m64)(__v1di){__i};
}
/// Casts a 64-bit integer vector into a 64-bit signed integer value.
@@ -134,10 +134,10 @@ _mm_cvtsi64_m64(long long __i)
/// A 64-bit integer vector.
/// \returns A 64-bit signed integer containing the same bitwise pattern as the
/// parameter.
-static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
+static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtm64_si64(__m64 __m)
{
- return (long long)__m;
+ return ((__v1di)__m)[0];
}
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -379,7 +379,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
@@ -400,7 +400,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
@@ -421,7 +421,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
@@ -536,7 +536,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
@@ -557,7 +557,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
@@ -578,7 +578,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
@@ -745,7 +745,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_mullo_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
@@ -1134,7 +1134,7 @@ _mm_srli_si64(__m64 __m, int __count)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_and_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
@@ -1155,7 +1155,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of the second
/// parameter and the one's complement of the first parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_andnot_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
@@ -1173,7 +1173,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_or_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
@@ -1191,7 +1191,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_xor_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
@@ -1213,7 +1213,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
@@ -1235,7 +1235,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
@@ -1257,7 +1257,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
@@ -1279,7 +1279,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
{
/* This function always performs a signed comparison, but __v8qi is a char
@@ -1303,7 +1303,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
@@ -1325,7 +1325,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)((__v2si)__m1 > (__v2si)__m2);
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 52cbe45ca238b..4507d2051aa6a 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -34,24 +34,28 @@ __m64 test_mm_add_pi8(__m64 a, __m64 b) {
// CHECK: add <8 x i8> {{%.*}}, {{%.*}}
return _mm_add_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_add_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), -21, +18, -15, +12, -9, +6, +9, -8));
__m64 test_mm_add_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_pi16
// CHECK: add <4 x i16> {{%.*}}, {{%.*}}
return _mm_add_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_add_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -9, +6, +9, -8));
__m64 test_mm_add_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_pi32
// CHECK: add <2 x i32> {{%.*}}, {{%.*}}
return _mm_add_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_add_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), -4, +5));
__m64 test_mm_add_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_si64
// CHECK: add i64 {{%.*}}, {{%.*}}
return _mm_add_si64(a, b);
}
+TEST_CONSTEXPR(match_v1di(_mm_add_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), -58));
__m64 test_mm_adds_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pi8
@@ -88,6 +92,7 @@ __m64 test_mm_and_si64(__m64 a, __m64 b) {
// CHECK: and <1 x i64> {{%.*}}, {{%.*}}
return _mm_and_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_and_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, 0, -1));
__m64 test_mm_andnot_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_andnot_si64
@@ -95,6 +100,7 @@ __m64 test_mm_andnot_si64(__m64 a, __m64 b) {
// CHECK: and <1 x i64> [[TMP]], {{%.*}}
return _mm_andnot_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_andnot_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, -1, 0));
__m64 test_mm_avg_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_avg_pu8
@@ -114,6 +120,7 @@ __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
return _mm_cmpeq_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_cmpeq_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), -1, 0, 0, -1, 0, -1, 0, -1));
__m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpeq_pi16
@@ -121,6 +128,7 @@ __m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
return _mm_cmpeq_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_cmpeq_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), 0, 0, -1, 0));
__m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpeq_pi32
@@ -128,6 +136,7 @@ __m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
return _mm_cmpeq_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_cmpeq_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), 0, -1));
__m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi8
@@ -135,6 +144,7 @@ __m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
return _mm_cmpgt_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_cmpgt_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), 0, -1, 0, 0, -1, 0, -1, 0));
__m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi16
@@ -142,6 +152,7 @@ __m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
return _mm_cmpgt_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_cmpgt_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), -1, 0, 0, 0));
__m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi32
@@ -149,6 +160,7 @@ __m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
return _mm_cmpgt_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_cmpgt_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), -1, 0));
__m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) {
// CHECK-LABEL: test_mm_cvt_pi2ps
@@ -210,12 +222,14 @@ __m64 test_mm_cvtsi32_si64(int a) {
// CHECK: insertelement <2 x i32>
return _mm_cvtsi32_si64(a);
}
+TEST_CONSTEXPR(match_v2si(_mm_cvtsi32_si64(-127), -127, 0));
int test_mm_cvtsi64_si32(__m64 a) {
// CHECK-LABEL: test_mm_cvtsi64_si32
// CHECK: extractelement <2 x i32>
return _mm_cvtsi64_si32(a);
}
+TEST_CONSTEXPR(_mm_cvtsi64_si32((__m64)(__v4hi){-2, 0, -1, -1}) == 65534);
__m64 test_mm_cvttpd_pi32(__m128d a) {
// CHECK-LABEL: test_mm_cvttpd_pi32
@@ -240,11 +254,13 @@ __m64 test_m_from_int(int a) {
// CHECK: insertelement <2 x i32>
return _m_from_int(a);
}
+TEST_CONSTEXPR(match_v2si(_m_from_int(255), 255, 0));
__m64 test_m_from_int64(long long a) {
// CHECK-LABEL: test_m_from_int64
return _m_from_int64(a);
}
+TEST_CONSTEXPR(match_v1di(_m_from_int64(-65536), -65536LL));
__m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi16
@@ -367,12 +383,14 @@ __m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
// CHECK: mul <4 x i16> {{%.*}}, {{%.*}}
return _mm_mullo_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_mullo_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -10, -16, +18, +16));
__m64 test_mm_or_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_or_si64
// CHECK: or <1 x i64> {{%.*}}, {{%.*}}
return _mm_or_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_or_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, -1));
__m64 test_mm_packs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_packs_pi16
@@ -644,24 +662,28 @@ __m64 test_mm_sub_pi8(__m64 a, __m64 b) {
// CHECK: sub <8 x i8> {{%.*}}, {{%.*}}
return _mm_sub_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_sub_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), +15, -14, +13, -12, +11, -10, -3, 0));
__m64 test_mm_sub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_pi16
// CHECK: sub <4 x i16> {{%.*}}, {{%.*}}
return _mm_sub_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sub_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), +11, -10, -3, 0));
__m64 test_mm_sub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_pi32
// CHECK: sub <2 x i32> {{%.*}}, {{%.*}}
return _mm_sub_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sub_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), +14, -11));
__m64 test_mm_sub_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_si64
// CHECK: sub i64 {{%.*}}, {{%.*}}
return _mm_sub_si64(a, b);
}
+TEST_CONSTEXPR(match_v1di(_mm_sub_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), +142));
__m64 test_mm_subs_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pi8
@@ -692,11 +714,13 @@ int test_m_to_int(__m64 a) {
// CHECK: extractelement <2 x i32>
return _m_to_int(a);
}
+TEST_CONSTEXPR(_m_to_int((__m64)(__v4hi){0, -2, -1, -1}) == -131072);
long long test_m_to_int64(__m64 a) {
// CHECK-LABEL: test_m_to_int64
return _m_to_int64(a);
}
+TEST_CONSTEXPR(_m_to_int64((__m64)(__v4hi){0, -2, 0, -1}) == -281470681874432LL);
__m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpackhi_pi8
@@ -739,3 +763,4 @@ __m64 test_mm_xor_si64(__m64 a, __m64 b) {
// CHECK: xor <1 x i64> {{%.*}}, {{%.*}}
return _mm_xor_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_xor_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, 0));
\ No newline at end of file
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesUpdate the easy add/sub/mul/logic/cmp/scalat_to_vector intrinsics to be constexpr compatible. I'm not expecting anyone to be very interested in using MMX intrinsics, but they're smaller than the other types and are useful to test the constexpr handling and test methods before we start applying them to SSE/AVX2/AVX512 intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/152296.diff 3 Files Affected:
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 78e8a422db4c1..06d08a486a954 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2127,8 +2127,9 @@ _mm_add_epi32(__m128i __a, __m128i __b) {
/// \param __b
/// A 64-bit integer.
/// \returns A 64-bit integer containing the sum of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
- return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_si64(__m64 __a,
+ __m64 __b) {
+ return (__m64)(((__v1du)__a)[0] + ((__v1du)__b)[0]);
}
/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
@@ -2557,8 +2558,9 @@ _mm_sub_epi32(__m128i __a, __m128i __b) {
/// A 64-bit integer vector containing the subtrahend.
/// \returns A 64-bit integer vector containing the difference of the values in
/// the operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
- return (__m64)((unsigned long long)__a - (unsigned long long)__b);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_si64(__m64 __a,
+ __m64 __b) {
+ return (__m64)(((__v1du)__a)[0] - ((__v1du)__b)[0]);
}
/// Subtracts the corresponding elements of two [2 x i64] vectors.
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index dc0fa5c523eeb..5a02a45512345 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -85,7 +85,7 @@ _mm_empty(void) {
/// A 32-bit integer value.
/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
/// parameter. The upper 32 bits are set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi32_si64(int __i)
{
return __extension__ (__m64)(__v2si){__i, 0};
@@ -102,7 +102,7 @@ _mm_cvtsi32_si64(int __i)
/// A 64-bit integer vector.
/// \returns A 32-bit signed integer value containing the lower 32 bits of the
/// parameter.
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi64_si32(__m64 __m)
{
return ((__v2si)__m)[0];
@@ -118,10 +118,10 @@ _mm_cvtsi64_si32(__m64 __m)
/// A 64-bit signed integer.
/// \returns A 64-bit integer vector containing the same bitwise pattern as the
/// parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi64_m64(long long __i)
{
- return (__m64)__i;
+ return __extension__ (__m64)(__v1di){__i};
}
/// Casts a 64-bit integer vector into a 64-bit signed integer value.
@@ -134,10 +134,10 @@ _mm_cvtsi64_m64(long long __i)
/// A 64-bit integer vector.
/// \returns A 64-bit signed integer containing the same bitwise pattern as the
/// parameter.
-static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
+static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtm64_si64(__m64 __m)
{
- return (long long)__m;
+ return ((__v1di)__m)[0];
}
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -379,7 +379,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
@@ -400,7 +400,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
@@ -421,7 +421,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
@@ -536,7 +536,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
@@ -557,7 +557,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
@@ -578,7 +578,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
@@ -745,7 +745,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_mullo_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
@@ -1134,7 +1134,7 @@ _mm_srli_si64(__m64 __m, int __count)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_and_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
@@ -1155,7 +1155,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of the second
/// parameter and the one's complement of the first parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_andnot_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
@@ -1173,7 +1173,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_or_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
@@ -1191,7 +1191,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_xor_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
@@ -1213,7 +1213,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
@@ -1235,7 +1235,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
@@ -1257,7 +1257,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
@@ -1279,7 +1279,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
{
/* This function always performs a signed comparison, but __v8qi is a char
@@ -1303,7 +1303,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
@@ -1325,7 +1325,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)((__v2si)__m1 > (__v2si)__m2);
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 52cbe45ca238b..4507d2051aa6a 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -34,24 +34,28 @@ __m64 test_mm_add_pi8(__m64 a, __m64 b) {
// CHECK: add <8 x i8> {{%.*}}, {{%.*}}
return _mm_add_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_add_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), -21, +18, -15, +12, -9, +6, +9, -8));
__m64 test_mm_add_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_pi16
// CHECK: add <4 x i16> {{%.*}}, {{%.*}}
return _mm_add_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_add_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -9, +6, +9, -8));
__m64 test_mm_add_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_pi32
// CHECK: add <2 x i32> {{%.*}}, {{%.*}}
return _mm_add_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_add_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), -4, +5));
__m64 test_mm_add_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_add_si64
// CHECK: add i64 {{%.*}}, {{%.*}}
return _mm_add_si64(a, b);
}
+TEST_CONSTEXPR(match_v1di(_mm_add_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), -58));
__m64 test_mm_adds_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pi8
@@ -88,6 +92,7 @@ __m64 test_mm_and_si64(__m64 a, __m64 b) {
// CHECK: and <1 x i64> {{%.*}}, {{%.*}}
return _mm_and_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_and_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, 0, -1));
__m64 test_mm_andnot_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_andnot_si64
@@ -95,6 +100,7 @@ __m64 test_mm_andnot_si64(__m64 a, __m64 b) {
// CHECK: and <1 x i64> [[TMP]], {{%.*}}
return _mm_andnot_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_andnot_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, -1, 0));
__m64 test_mm_avg_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_avg_pu8
@@ -114,6 +120,7 @@ __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
return _mm_cmpeq_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_cmpeq_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), -1, 0, 0, -1, 0, -1, 0, -1));
__m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpeq_pi16
@@ -121,6 +128,7 @@ __m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
return _mm_cmpeq_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_cmpeq_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), 0, 0, -1, 0));
__m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpeq_pi32
@@ -128,6 +136,7 @@ __m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
return _mm_cmpeq_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_cmpeq_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), 0, -1));
__m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi8
@@ -135,6 +144,7 @@ __m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
return _mm_cmpgt_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_cmpgt_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), 0, -1, 0, 0, -1, 0, -1, 0));
__m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi16
@@ -142,6 +152,7 @@ __m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
return _mm_cmpgt_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_cmpgt_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), -1, 0, 0, 0));
__m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_cmpgt_pi32
@@ -149,6 +160,7 @@ __m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
// CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
return _mm_cmpgt_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_cmpgt_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), -1, 0));
__m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) {
// CHECK-LABEL: test_mm_cvt_pi2ps
@@ -210,12 +222,14 @@ __m64 test_mm_cvtsi32_si64(int a) {
// CHECK: insertelement <2 x i32>
return _mm_cvtsi32_si64(a);
}
+TEST_CONSTEXPR(match_v2si(_mm_cvtsi32_si64(-127), -127, 0));
int test_mm_cvtsi64_si32(__m64 a) {
// CHECK-LABEL: test_mm_cvtsi64_si32
// CHECK: extractelement <2 x i32>
return _mm_cvtsi64_si32(a);
}
+TEST_CONSTEXPR(_mm_cvtsi64_si32((__m64)(__v4hi){-2, 0, -1, -1}) == 65534);
__m64 test_mm_cvttpd_pi32(__m128d a) {
// CHECK-LABEL: test_mm_cvttpd_pi32
@@ -240,11 +254,13 @@ __m64 test_m_from_int(int a) {
// CHECK: insertelement <2 x i32>
return _m_from_int(a);
}
+TEST_CONSTEXPR(match_v2si(_m_from_int(255), 255, 0));
__m64 test_m_from_int64(long long a) {
// CHECK-LABEL: test_m_from_int64
return _m_from_int64(a);
}
+TEST_CONSTEXPR(match_v1di(_m_from_int64(-65536), -65536LL));
__m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi16
@@ -367,12 +383,14 @@ __m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
// CHECK: mul <4 x i16> {{%.*}}, {{%.*}}
return _mm_mullo_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_mullo_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -10, -16, +18, +16));
__m64 test_mm_or_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_or_si64
// CHECK: or <1 x i64> {{%.*}}, {{%.*}}
return _mm_or_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_or_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, -1));
__m64 test_mm_packs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_packs_pi16
@@ -644,24 +662,28 @@ __m64 test_mm_sub_pi8(__m64 a, __m64 b) {
// CHECK: sub <8 x i8> {{%.*}}, {{%.*}}
return _mm_sub_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_sub_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), +15, -14, +13, -12, +11, -10, -3, 0));
__m64 test_mm_sub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_pi16
// CHECK: sub <4 x i16> {{%.*}}, {{%.*}}
return _mm_sub_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sub_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), +11, -10, -3, 0));
__m64 test_mm_sub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_pi32
// CHECK: sub <2 x i32> {{%.*}}, {{%.*}}
return _mm_sub_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sub_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), +14, -11));
__m64 test_mm_sub_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sub_si64
// CHECK: sub i64 {{%.*}}, {{%.*}}
return _mm_sub_si64(a, b);
}
+TEST_CONSTEXPR(match_v1di(_mm_sub_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), +142));
__m64 test_mm_subs_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pi8
@@ -692,11 +714,13 @@ int test_m_to_int(__m64 a) {
// CHECK: extractelement <2 x i32>
return _m_to_int(a);
}
+TEST_CONSTEXPR(_m_to_int((__m64)(__v4hi){0, -2, -1, -1}) == -131072);
long long test_m_to_int64(__m64 a) {
// CHECK-LABEL: test_m_to_int64
return _m_to_int64(a);
}
+TEST_CONSTEXPR(_m_to_int64((__m64)(__v4hi){0, -2, 0, -1}) == -281470681874432LL);
__m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_unpackhi_pi8
@@ -739,3 +763,4 @@ __m64 test_mm_xor_si64(__m64 a, __m64 b) {
// CHECK: xor <1 x i64> {{%.*}}, {{%.*}}
return _mm_xor_si64(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_xor_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, 0));
\ No newline at end of file
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,c -- clang/lib/Headers/emmintrin.h clang/lib/Headers/mmintrin.h clang/test/CodeGen/X86/mmx-builtins.c View the diff from clang-format here.diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 5a02a4551..b17d8b1bf 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -86,9 +86,8 @@ _mm_empty(void) {
/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
/// parameter. The upper 32 bits are set to 0.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtsi32_si64(int __i)
-{
- return __extension__ (__m64)(__v2si){__i, 0};
+_mm_cvtsi32_si64(int __i) {
+ return __extension__(__m64)(__v2si){__i, 0};
}
/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
@@ -103,9 +102,8 @@ _mm_cvtsi32_si64(int __i)
/// \returns A 32-bit signed integer value containing the lower 32 bits of the
/// parameter.
static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtsi64_si32(__m64 __m)
-{
- return ((__v2si)__m)[0];
+_mm_cvtsi64_si32(__m64 __m) {
+ return ((__v2si)__m)[0];
}
/// Casts a 64-bit signed integer value into a 64-bit integer vector.
@@ -119,9 +117,8 @@ _mm_cvtsi64_si32(__m64 __m)
/// \returns A 64-bit integer vector containing the same bitwise pattern as the
/// parameter.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtsi64_m64(long long __i)
-{
- return __extension__ (__m64)(__v1di){__i};
+_mm_cvtsi64_m64(long long __i) {
+ return __extension__(__m64)(__v1di){__i};
}
/// Casts a 64-bit integer vector into a 64-bit signed integer value.
@@ -135,9 +132,8 @@ _mm_cvtsi64_m64(long long __i)
/// \returns A 64-bit signed integer containing the same bitwise pattern as the
/// parameter.
static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtm64_si64(__m64 __m)
-{
- return ((__v1di)__m)[0];
+_mm_cvtm64_si64(__m64 __m) {
+ return ((__v1di)__m)[0];
}
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -380,9 +376,8 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
/// parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_add_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
+_mm_add_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
}
/// Adds each 16-bit integer element of the first 64-bit integer vector
@@ -401,9 +396,8 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
/// parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_add_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
+_mm_add_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
}
/// Adds each 32-bit integer element of the first 64-bit integer vector
@@ -422,9 +416,8 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
/// parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_add_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
+_mm_add_pi32(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
}
/// Adds, with saturation, each 8-bit signed integer element of the first
@@ -537,9 +530,8 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
/// both parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_sub_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
+_mm_sub_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
}
/// Subtracts each 16-bit integer element of the second 64-bit integer
@@ -558,9 +550,8 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
/// both parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_sub_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
+_mm_sub_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
}
/// Subtracts each 32-bit integer element of the second 64-bit integer
@@ -579,9 +570,8 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
/// both parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_sub_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
+_mm_sub_pi32(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
}
/// Subtracts, with saturation, each 8-bit signed integer element of the second
@@ -746,9 +736,8 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
/// of the products of both parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_mullo_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
+_mm_mullo_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
}
/// Left-shifts each 16-bit signed integer element of the first
@@ -1135,9 +1124,8 @@ _mm_srli_si64(__m64 __m, int __count)
/// \returns A 64-bit integer vector containing the bitwise AND of both
/// parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_and_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
+_mm_and_si64(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
}
/// Performs a bitwise NOT of the first 64-bit integer vector, and then
@@ -1156,9 +1144,8 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector containing the bitwise AND of the second
/// parameter and the one's complement of the first parameter.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_andnot_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
+_mm_andnot_si64(__m64 __m1, __m64 __m2) {
+ return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
}
/// Performs a bitwise OR of two 64-bit integer vectors.
@@ -1174,9 +1161,8 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector containing the bitwise OR of both
/// parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_or_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
+_mm_or_si64(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
}
/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
@@ -1192,9 +1178,8 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
/// parameters.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_xor_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
+_mm_xor_si64(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
}
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1214,9 +1199,8 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
+_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
}
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
@@ -1236,9 +1220,8 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
+_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
}
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1258,9 +1241,8 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
+_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
+ return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
}
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1280,8 +1262,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
-{
+_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
/* This function always performs a signed comparison, but __v8qi is a char
which may be signed or unsigned, so use __v8qs. */
return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
@@ -1304,9 +1285,8 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
+_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
}
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1326,9 +1306,8 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)((__v2si)__m1 > (__v2si)__m2);
+_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
+ return (__m64)((__v2si)__m1 > (__v2si)__m2);
}
/// Constructs a 64-bit integer vector initialized to zero.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Update the easy add/sub/mul/logic/cmp/scalar_to_vector intrinsics to be constexpr compatible.
I'm not expecting anyone to be very interested in using MMX intrinsics, but they're smaller than the other types and are useful to test the constexpr handling and test methods before we start applying them to SSE/AVX2/AVX512 intrinsics.